clear all
set more off, perm
set mem 10000000
set matsize 10000
version 13

***************************************************** 
*** DD regressions: reduced-form Census outcomes ****
***************************************************** 

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

********************************************************************************
********************************************************************************

** 1. Run difference in discontinuities for Census outcomes (where possible)
{
use "$panel/panel_dataset_dd.dta", clear
keep if vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 

merge 1:1 pca01_id st_code dt_code vplan4 vdpr4 year using ///
	"$panel/panel_dataset_lights_allyears.dta", keep(3) nogen
egen temp1 = max(lights_max*(year==2001)), by(pca01_id)
egen temp2 = max(lights_max*(year==2011)), by(pca01_id)
gen lights_diff = abs(temp2-temp1)
assert lights_diff!=.
	
egen stdtbk = group(stdt bk_code)
gen X = (tot_p01-300)
assert X!=.
gen D = X>=0
gen DX = D*X

gen aw150 = abs(150-abs(300-tot_p01))
gen aw100 = abs(100-abs(300-tot_p01))
	
preserve
use "$panel/panel_dataset_full.dta", clear
egen temp1 = mean(vdp_pwr_h_all_avg) if vdp_pwr_h_all_avg_11>0 & vdp_pwr_h_all_avg_11!=., by(st_code dt_code)
egen HRS_all_wide = mode(temp1), by(st_code dt_code)
egen temp2 = mean(vdp_pwr_h_dom_avg) if vdp_pwr_h_dom_avg_11>0 & vdp_pwr_h_dom_avg_11!=., by(st_code dt_code)
egen HRS_dom_wide = mode(temp2), by(st_code dt_code)
gen HRS_all = HRS_all_wide>=10 & HRS_all_wide!=.
gen HRS_dom = HRS_dom_wide>=10 & HRS_dom_wide!=.
replace HRS_all = . if HRS_all_wide==.
replace HRS_dom = . if HRS_dom_wide==.
keep st_code dt_code HRS_all HRS_dom
duplicates drop
tempfile hrs
save `hrs'
restore
merge m:1 st_code dt_code using `hrs', nogen
drop if pca01_id==.

cap drop H
gen H = HRS_all

	// Create index of VD variables
local vd_index_vars = "vd_com_d_phone_ll vd_com_d_post_off vd_edu_n_p_sch vd_edu_n_m_sch vd_edu_n_s_sch " ///
					+ "vd_edu_n_s_s_sch vd_edu_n_coll vd_edu_n_tr_sch vd_fin_d_ac_soc vd_fin_d_bank vd_fin_d_comm_bank " /// 
					+ "vd_fin_d_coop_bank vd_geo_a_irr vd_hea_n_alt_hosp vd_hea_n_fw_cntr vd_hea_n_mcw_cntr vd_hea_n_ph_subcntr " ///
					+ "vd_tra_d_bus vd_wat_d_any"
foreach v of varlist `vd_index_vars' {
	qui sum `v' if tot_p01<=1000 & year==2011
	gen Z`v'11 = (`v' - r(mean))/(r(sd)) if year==2011
	qui sum `v' if tot_p01<=1000 & year==2001
	gen Z`v'01 = (`v' - r(mean))/(r(sd)) if year==2001
}
egen double Zindex_vd11 = rmean(Zvd*11)
egen double Zindex_vd01 = rmean(Zvd*01)
gen index_vd = Zindex_vd11 if year==2011
replace index_vd = Zindex_vd01 if year==2001
assert index_vd!=.
drop Z*

	// Define DD outcomes
global vars_demo = "pct_06 hpca_lit_p"
global vars_labor_pct = "work_pooled_ag_m work_pooled_ag_f work_pooled_hh_m work_pooled_hh_f work_pooled_ot_m work_pooled_ot_f work_pooled_ag_p work_pooled_ot_p work_main_p work_marg_p"
global vars_vd = "vd_com_d_post_off vd_fin_d_ac_soc vd_wat_d_tubewell pct_irr"
global vars_index = "work_p index_vd"


gen yvar = ""
gen fes = ""
gen vce = ""
gen bw = .
gen beta = .
gen se = .
gen tscore = .
gen pvalue = .
gen ci95_lo = .
gen ci95_hi = .
gen beta_hi = .
gen se_hi = .
gen tscore_hi = .
gen pvalue_hi = .
gen ci95_lo_hi = .
gen ci95_hi_hi = .
gen beta_lo = .
gen se_lo = .
gen tscore_lo = .
gen pvalue_lo = .
gen ci95_lo_lo = .
gen ci95_hi_lo = .
gen pval_equal = .
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
local row = 1
local fes = "pca01_id year#stdt"
local vce = "cluster stdtbk"

	// Regression loop
foreach yvar of varlist $vars_demo $vars_labor_pct $vars_vd $vars_index {
	foreach bw in 100 150 {

		// Uninteracted RD
		reghdfe `yvar' /// 1.D#2001.year ///
			1.D#2011.year c.X#i.year c.DX#i.year if inrange(tot_p01,300-`bw',300+`bw') & ///
			pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
			
		replace yvar = "`yvar'" in `row'
		replace fes = "`fes'" in `row'
		replace vce = "`vce'" in `row'
		replace bw = `bw' in `row'
		replace beta = _b[1.D#2011.year] in `row'
		replace se = _se[1.D#2011.year] in `row'	
		replace tscore = beta/se in `row'
		replace pvalue = 2*ttail(e(df_r),abs(_b[1.D#2011.year]/_se[1.D#2011.year])) in `row'
		replace ci95_lo = beta - se*invttail(e(df_r),0.025) in `row'
		replace ci95_hi = beta + se*invttail(e(df_r),0.025) in `row'
		sum `yvar' if e(sample)							
		replace ymean = r(mean)	in `row'
		replace nobs = e(N)	in `row'
		replace nclust = e(N_clust) in `row'
		replace rmse = e(rmse) in `row'
		local row = `row' + 1
	

		// Heterogenoeous power supply +/- 10 hours/day
		reghdfe `yvar' ///
			1.D#2011.year#1.H 1.D#2011.year#0.H c.X#i.year c.DX#i.year if inrange(tot_p01,300-`bw',300+`bw') & ///
			pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
			
		replace yvar = "`yvar'" in `row'
		replace fes = "`fes'" in `row'
		replace vce = "`vce'" in `row'
		replace bw = `bw' in `row'
		replace beta_hi = _b[1.D#2011.year#1.H] in `row'
		replace se_hi = _se[1.D#2011.year#1.H] in `row'	
		replace tscore_hi = beta/se in `row'
		replace pvalue_hi = 2*ttail(e(df_r),abs(_b[1.D#2011.year#1.H]/_se[1.D#2011.year#1.H])) in `row'
		replace ci95_lo_hi = beta - se*invttail(e(df_r),0.025) in `row'
		replace ci95_hi_hi = beta + se*invttail(e(df_r),0.025) in `row'
		replace beta_lo = _b[1.D#2011.year#0.H] in `row'
		replace se_lo = _se[1.D#2011.year#0.H] in `row'	
		replace tscore_lo = beta/se in `row'
		replace pvalue_lo = 2*ttail(e(df_r),abs(_b[1.D#2011.year#0.H]/_se[1.D#2011.year#0.H])) in `row'
		replace ci95_lo_lo = beta - se*invttail(e(df_r),0.025) in `row'
		replace ci95_hi_lo = beta + se*invttail(e(df_r),0.025) in `row'
		test 1.D#2011.year#1.H=1.D#2011.year#0.H
		replace pval_equal = r(p) in `row'
		sum `yvar' if e(sample)							
		replace ymean = r(mean)	in `row'
		replace nobs = e(N)	in `row'
		replace nclust = e(N_clust) in `row'
		replace rmse = e(rmse) in `row'
		local row = `row' + 1

	}
}
	
keep yvar-rmse
dropmiss, obs force
compress
save "$results/diff_in_disc_outcomes_census.dta" , replace
	
}

********************************************************************************
********************************************************************************

** 2. Run Census diff-in-diff colllapsing to district level (by HH, to align with NSS)
{
use "$panel/panel_dataset_dd.dta", clear

	// Create index of VD variables
local vd_index_vars = "vd_com_d_phone_ll vd_com_d_post_off vd_edu_n_p_sch vd_edu_n_m_sch vd_edu_n_s_sch " ///
					+ "vd_edu_n_s_s_sch vd_edu_n_coll vd_edu_n_tr_sch vd_fin_d_ac_soc vd_fin_d_bank vd_fin_d_comm_bank " /// 
					+ "vd_fin_d_coop_bank vd_geo_a_irr vd_hea_n_alt_hosp vd_hea_n_fw_cntr vd_hea_n_mcw_cntr vd_hea_n_ph_subcntr " ///
					+ "vd_tra_d_bus vd_wat_d_any"
foreach v of varlist `vd_index_vars' {
	qui sum `v' if tot_p01<=1000 & year==2011
	gen Z`v'11 = (`v' - r(mean))/(r(sd)) if year==2011
	qui sum `v' if tot_p01<=1000 & year==2001
	gen Z`v'01 = (`v' - r(mean))/(r(sd)) if year==2001
}
egen double Zindex_vd11 = rmean(Zvd*11)
egen double Zindex_vd01 = rmean(Zvd*01)
gen index_vd = Zindex_vd11 if year==2011
replace index_vd = Zindex_vd01 if year==2001
assert index_vd!=.
drop Z*

	// Define DD outcomes
global vars_demo = "pct_06 hpca_h_size_avg lit_p"
global vars_labor_pct = "work_pooled_ag_m work_pooled_ag_f work_pooled_hh_m work_pooled_hh_f work_pooled_ot_m work_pooled_ot_f work_pooled_ag_p work_pooled_ot_p work_main_p work_marg_p"
global vars_labor_pct = "work_pooled_ag_m work_pooled_ag_f work_pooled_hh_m work_pooled_hh_f work_pooled_ot_m work_pooled_ot_f work_pooled_ag_p work_pooled_ot_p work_main_p work_marg_p"
global vars_assets = "hpca_assets_tel_l_m_b hpca_assets_tv hpca_assets_bic hpca_assets_smm hpca_assets_none hpca_assets_rt"
global vars_hhold = "hpca_cook_good hpca_msl_bad hpca_mat_f_m hpca_mat_r_gtbw hpca_t_hh_d hpca_msl_elec"
global vars_vd = "vd_com_d_post_off vd_fin_d_ac_soc vd_wat_d_tubewell pct_irr"
global vars_index = "work_p index_vd"

	// Manual collapse to district level
egen double temp_denom = sum(no_hh), by(year st_code dt_code)
foreach v of varlist $vars_demo $vars_labor_pct $vars_assets $vars_hhold $vars_vd $vars_index {
	egen double temp = sum(no_hh * `v' / temp_denom), by(year st_code dt_code)
	replace `v' = temp
	drop temp
}
keep year st_code dt_code $vars_demo $vars_labor_pct $vars_assets $vars_hhold $vars_vd $vars_index treat_x_post
duplicates drop
unique year st_code dt_code
assert r(unique)==r(N)
egen stdt = group(st_code dt_code)

preserve
use "$panel/panel_dataset_dd_nss.dta", clear
keep st_code dt_code exp05*
duplicates drop
tempfile nss
save `nss'
restore
merge m:1 st_code dt_code using `nss'


gen yvar = ""
gen fes = ""
gen vce = ""
gen beta = .
gen se = .
gen tscore = .
gen pvalue = .
gen ci95_lo = .
gen ci95_hi = .
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
local row = 1	

local vce = "cluster stdt"

foreach yvar of varlist $vars_demo $vars_labor_pct $vars_assets $vars_hhold $vars_vd $vars_index {
	foreach fe in 1 2 3  {
		
		if `fe'==1 {
			local fes = "year stdt"
		}
		else if `fe'==2 {
			local fes = "year stdt exp05_st_4ile#c.year exp05_ntl_10ile#c.year"
		}
		else if `fe'==3 {
			local fes = "year stdt exp05_st_4ile#c.year exp05_ntl_10ile#c.year st_code#c.year"
		}
	
		reghdfe `yvar' treat_x_post, a(`fes') vce(`vce')

		replace yvar = "`yvar'" in `row'
		replace fes = "`fes'" in `row'
		replace vce = "`vce'" in `row'
		replace beta = _b[treat_x_post] in `row'
		replace se = _se[treat_x_post] in `row'	
		replace tscore = beta/se in `row'
		replace pvalue = 2*ttail(e(df_r),abs(_b[treat_x_post]/_se[treat_x_post])) in `row'
		replace ci95_lo = beta - se*invttail(e(df_r),0.025) in `row'
		replace ci95_hi = beta + se*invttail(e(df_r),0.025) in `row'
		sum `yvar' if e(sample)							
		replace ymean = r(mean)	in `row'
		replace nobs = e(N)	in `row'
		replace nclust = e(N_clust) in `row'
		replace rmse = e(rmse) in `row'
		local row = `row' + 1

	}
}

keep yvar-rmse
dropmiss, obs force
compress
save "$results/dd_outcomes_census_distlevel.dta", replace
	
}

********************************************************************************
********************************************************************************

** 3. Run Census diff-in-diff at village level
{
use "$panel/panel_dataset_dd.dta", clear

	// Create index of VD variables
local vd_index_vars = "vd_com_d_phone_ll vd_com_d_post_off vd_edu_n_p_sch vd_edu_n_m_sch vd_edu_n_s_sch " ///
					+ "vd_edu_n_s_s_sch vd_edu_n_coll vd_edu_n_tr_sch vd_fin_d_ac_soc vd_fin_d_bank vd_fin_d_comm_bank " /// 
					+ "vd_fin_d_coop_bank vd_geo_a_irr vd_hea_n_alt_hosp vd_hea_n_fw_cntr vd_hea_n_mcw_cntr vd_hea_n_ph_subcntr " ///
					+ "vd_tra_d_bus vd_wat_d_any"
foreach v of varlist `vd_index_vars' {
	qui sum `v' if tot_p01<=1000 & year==2011
	gen Z`v'11 = (`v' - r(mean))/(r(sd)) if year==2011
	qui sum `v' if tot_p01<=1000 & year==2001
	gen Z`v'01 = (`v' - r(mean))/(r(sd)) if year==2001
}
egen double Zindex_vd11 = rmean(Zvd*11)
egen double Zindex_vd01 = rmean(Zvd*01)
gen index_vd = Zindex_vd11 if year==2011
replace index_vd = Zindex_vd01 if year==2001
assert index_vd!=.
drop Z*

	// Define DD outcomes
global vars_demo = "pct_06 lit_p"
global vars_labor_pct = "work_pooled_ag_m work_pooled_ag_f work_pooled_hh_m work_pooled_hh_f work_pooled_ot_m work_pooled_ot_f work_pooled_ag_p work_pooled_ot_p work_main_p work_marg_p"
global vars_vd = "vd_com_d_post_off vd_fin_d_ac_soc vd_wat_d_tubewell pct_irr"
global vars_index = "work_p index_vd"

preserve
use "$panel/panel_dataset_dd_nss.dta", clear
keep st_code dt_code exp05*
duplicates drop
tempfile nss
save `nss'
restore
merge m:1 st_code dt_code using `nss'

gen yvar = ""
gen fes = ""
gen vce = ""
gen beta = .
gen se = .
gen tscore = .
gen pvalue = .
gen ci95_lo = .
gen ci95_hi = .
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
local row = 1	

local vce = "cluster stdt"

foreach yvar of varlist $vars_demo $vars_labor_pct $vars_vd $vars_index {
	
	foreach fe in 1 2 3  {
		
		if `fe'==1 {
			local fes = "year pca01_id"
		}
		else if `fe'==2 {
			local fes = "year pca01_id exp05_st_4ile#c.year exp05_ntl_10ile#c.year"
		}
		else if `fe'==3 {
			local fes = "year pca01_id exp05_st_4ile#c.year exp05_ntl_10ile#c.year st_code#c.year"
		}
		
		reghdfe `yvar' treat_x_post, a(`fes') vce(`vce')

		replace yvar = "`yvar'" in `row'
		replace fes = "`fes'" in `row'
		replace vce = "`vce'" in `row'
		replace beta = _b[treat_x_post] in `row'
		replace se = _se[treat_x_post] in `row'	
		replace tscore = beta/se in `row'
		replace pvalue = 2*ttail(e(df_r),abs(_b[treat_x_post]/_se[treat_x_post])) in `row'
		replace ci95_lo = beta - se*invttail(e(df_r),0.025) in `row'
		replace ci95_hi = beta + se*invttail(e(df_r),0.025) in `row'
		sum `yvar' if e(sample)							
		replace ymean = r(mean)	in `row'
		replace nobs = e(N)	in `row'
		replace nclust = e(N_clust) in `row'
		replace rmse = e(rmse) in `row'
		local row = `row' + 1

	}
}

keep yvar-rmse
dropmiss, obs force
compress
save "$results/dd_outcomes_census_pooled.dta", replace
	
}

********************************************************************************
********************************************************************************
